****************************************************************************
***** This do.file generates measures of inflation on continued 
***** goods across the product space (Quality-Modules) by states
****************************************************************************

global db "D:\Dropbox\unequal_gains\main_data"
global db2 "D:\Dropbox\unequal_gains\QJE revision plan\analysis"
cd "$db2\RMS\collapsed_files\state_level"
global Section4 "D:\Dropbox\unequal_gains\QJE revision plan\analysis\section4_data"

************************************
* (1) Inflation for continued goods
************************************

foreach i of numlist 2006(1)2014 {

display "Starting Tornqvist computation for year `i', state level"

local var=`i'+1

use state_`i', clear
rename total_quantity total_quantity_old
rename average_unit_price average_unit_price_old
gen upc_ver_uc=1
rename total_spending total_spending_old
save temp, replace

* now we get quality decile for each UPC (in the full price distribution, within modules)
collapse (sum) total_quantity_old total_spending_old, by(upc upc_ver_uc product_module_code)
gen average_unit_price_all=total_spending_old/total_quantity_old
* bring in info on UPC price decile (within product module) in base period:
merge 1:1 upc upc_ver_uc using "$db/Important Lists/upc_brand_size_final.dta"
keep if _merge==3
drop _merge
sum size1_amount, d
drop if size1_amount<r(p1) | size1_amount>r(p99)
gen average_unit_price_adj=average_unit_price_all/(size1_amount*multi)
* create quality ranks without weights
sort product_module_code average_unit_price_adj
bysort product_module_code: gen double temp=[_N]
bysort product_module_code: gen double temp2=[_n]
gen rank=temp2/temp*100
gen quality_rank=.
foreach r of numlist 1(1)10 {
replace quality_rank=`r' if rank<=`r'*10 & missing(quality_rank)
}
keep upc upc_ver_uc quality_rank

merge 1:m upc upc_ver_uc using temp
keep if _merge==3
drop _merge
rm temp.dta

merge 1:1 upc fips_state_code using state_`var'
keep if _merge==3

gen double price_ratio=average_unit_price/average_unit_price_old
* get rid of outliers
sum price_ratio, d
gen p1_p=r(p1)
gen p99_p=r(p99)
drop if price_ratio<p1_p | price_ratio>p99_p 

keep upc upc_ver_uc total_spending total_spending_old total_quantity total_quantity_old ///
average_unit_price average_unit_price_old price_ratio product_module_code quality_rank fips_state_code
duplicates drop

* (i) compute Tornqvist/Laspeyres/Paasche/CES price by quality_module
rename total_spending total_spending_all
rename total_spending_old total_spending_all_old
rename price_ratio price_ratio_all
rename total_quantity total_quantity_all
rename total_quantity_old total_quantity_all_old
rename average_unit_price average_unit_price_all
rename average_unit_price_old average_unit_price_all_old
* shares
bysort product_module quality fips_state_code: egen double total_spending_QM=sum(total_spending_all)
bysort product_module quality fips_state_code: egen double total_spending_QM_old=sum(total_spending_all_old)
gen double share_QM=total_spending_all/total_spending_QM
gen double share_QM_old=total_spending_all_old/total_spending_QM_old
* tornqvist
gen double weight_QM=1/2*(share_QM+share_QM_old)
gen double numerator_temp=weight*log(price_ratio_all)
bysort product_module quality fips_state_code: egen double numerator=sum(numerator_temp)
gen double tornqvist_price_index=exp(numerator)
drop numerator numerator_temp weight*
* ces price
gen double weight_num=(share_QM-share_QM_old)/(ln(share_QM)-ln(share_QM_old))
bysort product_module quality fips_state_code: egen double weight_den=sum(weight_num)
gen double weight=weight_num/weight_den
gen double numerator_temp=weight*log(price_ratio_all)
bysort product_module quality fips_state_code: egen double numerator=sum(numerator_temp)
gen double ces_price_index=exp(numerator)
drop numerator_temp numerator
* laspeyres
gen double numerator_temp=average_unit_price_all*total_quantity_all_old 
gen double denominator_temp=average_unit_price_all_old*total_quantity_all_old
bysort product_module quality fips_state_code: egen double numerator=sum(numerator_temp)
bysort product_module quality fips_state_code: egen double denominator=sum(denominator_temp)
gen laspeyres_price_index=numerator/denominator
drop numerator denominator numerator_temp denominator_temp
* paasche
gen double numerator_temp=average_unit_price_all*total_quantity_all
gen double denominator_temp=average_unit_price_all_old*total_quantity_all
bysort product_module quality fips_state_code: egen double numerator=sum(numerator_temp)
bysort product_module quality fips_state_code: egen double denominator=sum(denominator_temp)
gen paasche_price_index=numerator/denominator
drop numerator denominator numerator_temp denominator_temp

keep tornqvist_price_index ces_price_index laspeyres_price_index paasche_price_index ///
product_module quality fips_state_code total_spending_QM total_spending_QM_old
duplicates drop

* winsorize the data at 1% level
foreach x in tornqvist ces laspeyres paasche {
sum `x' [aw= total_spending_QM + total_spending_QM_old ], d
gen `x'_win=`x'
replace `x'_win=r(p1) if `x'_price_index<r(p1)
replace `x'_win=r(p99) if `x'_price_index>r(p99)
}

save "$db/Important Price Datasets/inflation_QM_`i'_RMS_state.dta", replace
}


*********************************
* (2) Measures of entry and exit 
*********************************
foreach i of numlist 2006(1)2015 {

local prev=`i'-1
local next=`i'+1

use state_`i', clear
gen upc_ver_uc=1

* now we get quality decile for each UPC (in the full price distribution, within modules)
collapse (sum) total_quantity total_spending, by(upc upc_ver_uc product_module_code)
gen average_unit_price_all=total_spending/total_quantity
* bring in info on UPC price decile (within product module) in base period:
merge 1:1 upc upc_ver_uc using "$db/Important Lists/upc_brand_size_final.dta"
keep if _merge==3
drop _merge
sum size1_amount, d
drop if size1_amount<r(p1) | size1_amount>r(p99)
gen average_unit_price_adj=average_unit_price_all/(size1_amount*multi)
* create quality ranks without weights
sort product_module_code average_unit_price_adj
bysort product_module_code: gen double temp=[_N]
bysort product_module_code: gen double temp2=[_n]
gen rank=temp2/temp*100
gen quality_rank=.
foreach r of numlist 1(1)10 {
replace quality_rank=`r' if rank<=`r'*10 & missing(quality_rank)
}
keep upc upc_ver_uc quality_rank

merge 1:m upc using state_`i'
keep if _merge==3
drop _merge

* identify new products (for the market as a whole)
merge m:1 upc upc_ver_uc using "$db/Important Lists/available_upcs_upto`prev'_final.dta"
drop if _merge==2
gen new=(_merge==1)
drop _merge

merge m:1 upc upc_ver_uc using "$db/Important Lists/available_upcs_post`i'_final.dta"
drop if _merge==2
gen exit=(_merge==1)
drop _merge

gen double total_spending_all=total_spending
gen double total_spending_new_all=total_spending*new
gen double total_spending_exit_all=total_spending*exit

keep upc upc_ver_uc total_spending_all total_spending_new_all total_spending_exit_all ///
product_module_code quality_rank fips_state_code

* compute spending on new/exiting goods by quality_module
bysort product_module quality fips_state_code: egen double total_spending_QM=sum(total_spending_all)
bysort product_module quality fips_state_code: egen double total_spending_new_QM=sum(total_spending_new_all)
bysort product_module quality fips_state_code: egen double total_spending_exit_QM=sum(total_spending_exit_all)

keep product_module quality total_spending_QM total_spending_new_QM total_spending_exit_QM fips_state_code
duplicates drop

gen ssnpQM  = total_spending_new_QM/total_spending_QM
gen ssepQM  = total_spending_exit_QM/total_spending_QM

gen year=`i'
save "$db/Important Price Datasets/RMSnew_exit_QM_`i'_state.dta", replace
}


*************************************************
** (3) Build outcome dataset at the state level
*************************************************

* (A) new/exiting goods

* prepare list of deparmtent codes, which are missing from the stata dataset
use "$Section4/outcomes.dta", clear
keep department_code product_module_code product_group_code
duplicates drop
save "$Section4/product_cat_list", replace

use "$db/Important Price Datasets/RMSnew_exit_QM_2006_state.dta", clear
foreach i of numlist 2007(1)2015 {
append using "$db/Important Price Datasets/RMSnew_exit_QM_`i'_state.dta"
}
* keep track of spending in long format 
foreach i of numlist 2006(1)2015 {
gen temp=total_spending_QM*(year==`i')
bysort product_module_code quality_rank fips_state_code: egen total_spending_QM_`i'=max(temp)
drop temp
}
* collapse dataset after keep only "balanced" panel
egen id=group(product_module_code quality_rank fips_state_code)
drop if missing(id)
tsset id year
rename ssep temp
gen ssepQM=L.temp
drop temp
drop if year==2006
* make sure we have a balanced panel: 
bysort product_module_code fips_state_code: gen N_QMyears=[_N]
* lot's have either 80 or 90 observations (since year 2010 missing for lots of them,
* we have either 8 or 9 years)
keep if N_QMy==80 | N_QMy==90
drop N_QMy

gen feenstra=(1-ssnp)/(1-ssep)
* there is a small number of obs for which ssnp=1 or ssep=0, then we can't compute feenstra
replace ssnp=0 if missing(feenstra)
replace ssep=0 if missing(feenstra)
replace feenstra=1 if missing(feenstra)

* winsorize feenstra at 5% level (by department/year)
* get info on dept code from analysis dataset
merge m:1 product_module_code using "$Section4/product_cat_list"
keep if _merge==3
drop _merge
bysort department_code year: egen p5 = pctile(feenstra), p(5)
bysort department_code year: egen p95 = pctile(feenstra), p(95)
gen feenstra_win = feenstra
replace feenstra_win = p95 if feenstra>p95
replace feenstra_win = p5 if feenstra<p5

* ensure that product modules don't switch product groups over time (happens for 20 obs)
bysort product_module_code: egen temp=min(product_group_code)
replace product_group_code=temp if product_group_code!=temp

* now bring in elasticities
merge m:1 product_module_code using "$db/Important Price Datasets/sigmas"
drop if _merge==2
drop _merge

foreach i in feenstra_win {
gen `i'_common=`i'^(1/(sigma_common-1))
gen `i'_Handbury=`i'^(1/(2.09-1))
gen `i'_Dube=`i'^(1/(4-1))
gen `i'_MR=`i'^(1/(7-1))
gen `i'_BW=`i'^(1/(11.5-1))
}
* save at disaggregated level (by year) to compute full inflation below
preserve
keep product_module_code quality fips_state_code feenstra_win_* year
save "$Section4/temp_RMS.dta", replace
restore

collapse (mean) ssnp ssep feenstra feenstra_win, by(fips_state_code quality_rank department_code product_group_code product_module_code total_spending_QM_2*)

* generate log spending variables
gen AvgDeltaLog_totspending = ( ///
log( (total_spending_QM_2014+total_spending_QM_2015)/2 ) ///
- log( (total_spending_QM_2007+total_spending_QM_2006)/2) ) / (2015-2007)
gen AvgLog_totspending = ( ///
log( (total_spending_QM_2014+total_spending_QM_2015)/2 ) ///
+ log( (total_spending_QM_2007+total_spending_QM_2006)/2) ) / 2

* now rename var and save dataset
foreach i in  total_spending_QM_2006 total_spending_QM_2007 total_spending_QM_2008 ///
  total_spending_QM_2009 total_spending_QM_2010 total_spending_QM_2011 total_spending_QM_2012 ///
  total_spending_QM_2013 total_spending_QM_2014 total_spending_QM_2015 ssnpQM ssepQM feenstra ///
  feenstra_win AvgDeltaLog_totspending AvgLog_totspending {
rename `i' RMS_`i'
}
gen RMS_newexit=1

save "$Section4/outcomes_state.dta", replace

* (B) Bring inflation on continued products and new goods together

use "$db/Important Price Datasets/inflation_QM_2006_RMS_state.dta", clear
gen year=2006
foreach i of numlist 2007(1)2014 {
append using "$db/Important Price Datasets/inflation_QM_`i'_RMS_state.dta"
replace year=`i' if missing(year)
}
* keep track of spending in long format 
foreach i of numlist 2006(1)2014 {
gen temp=(total_spending_QM+total_spending_QM_old)/2*(year==`i')
bysort product_module_code quality_rank: egen continued_spending_QM_`i'=max(temp)
drop temp
}
* collapse dataset after keep only "balanced" panel
bysort product_module_code fips_state_code: gen N_QMyears=[_N]
* have either 70 or 90 observations (since year 2010 missing for lots of them,
* we have either 7 or 9 years -- for some we lose 2 years because can't match to previous/next years)
keep if N_QMy==70 | N_QMy==90
drop N_QMy
* we lose quite a few datapoints here

* bring info on departments and winsorize
merge m:1 product_module_code using "$Section4/product_cat_list"
keep if _merge==3
drop _merge
foreach i in tornqvist_price_index ces_price_index laspeyres_price_index paasche_price_index {
bysort department_code year: egen p5 = pctile(`i'), p(5)
bysort department_code year: egen p95 = pctile(`i'), p(95)
gen `i'_win = `i'
replace `i'_win = p95 if `i'>p95
replace `i'_win = p5 if `i'<p5
drop p5 p95
}

* ensure that product modules don't switch product groups over time (happens for 20 obs)
bysort product_module_code: egen temp=min(product_group_code)
replace product_group_code=temp if product_group_code!=temp

* now bring the feenstra ratio 
replace year=year+1
merge 1:1 product_module_code quality year fips_state_code using "$Section4/temp_RMS.dta", keepusing(feenstra*)
drop if _merge==2
drop _merge

foreach i in common Handbury Dube MR BW {
gen fullces_pi_win_`i'=ces_price_index_win*feenstra_win_`i'
gen lfullces_pi_win_`i'=log(ces_price_index_win*feenstra_win_`i')
}

collapse (mean) paasche* laspeyres* tornqvist* ces* fullces* lfullces*, by(fips_state_code quality_rank department_code product_group_code product_module_code continued_spending_QM_2*)
distinct product_module_code

* generate log spending variables
gen AvgDeltaLog_contspending = ( ///
log( (continued_spending_QM_2013+continued_spending_QM_2014)/2 ) ///
- log( (continued_spending_QM_2007+continued_spending_QM_2006)/2) ) / (2015-2007-1)
gen AvgLog_contspending = ( ///
log( (continued_spending_QM_2013+continued_spending_QM_2014)/2 ) ///
+ log( (continued_spending_QM_2007+continued_spending_QM_2006)/2) ) / 2

* now rename var and merge to main dataset
foreach i in continued_spending_QM_2006 continued_spending_QM_2007 continued_spending_QM_2008 ///
continued_spending_QM_2009 continued_spending_QM_2010 continued_spending_QM_2011 continued_spending_QM_2012 ///
continued_spending_QM_2013 continued_spending_QM_2014 paasche_price_index paasche_price_index_win laspeyres_price_index ///
laspeyres_price_index_win tornqvist_price_index tornqvist_price_index_win ces_price_index ///
ces_price_index_win AvgDeltaLog_contspending AvgLog_contspending ///
 fullces_pi_win_common fullces_pi_win_Handbury fullces_pi_win_Dube fullces_pi_win_MR fullces_pi_win_BW ///
 lfullces_pi_win_common lfullces_pi_win_Handbury lfullces_pi_win_Dube lfullces_pi_win_MR lfullces_pi_win_BW {
rename `i' RMS_`i'
}
gen RMS_continued=1
merge 1:1 product_module_code quality_rank fips_state_code using "$Section4/outcomes_state.dta"
drop _merge

* now finalize dataset
foreach i in RMS_continued RMS_newexit {
replace `i'=0 if missing(`i')
}

distinct product_module_code if RMS_continued==1 & RMS_newexit==1
* we have 729 modules we can work with

* generate identifiers for outliers (with RMS spending weights)
foreach i in RMS_avg_Dlog_realspending ///
RMS_feenstra_win ///
RMS_ltornqvist_price_index_win ///
RMS_lfullces_pi_win_common  ///
inst_raw inst_raw_lvl inst_linageres inst_ageres inst_ageeducres ///
inst_ageeducstateres inst_allres inst_educres inst_stateres ///
inst_linageducres inst_linagestateres inst_linagerace inst_linagechild ///
inst_raceres inst_childrenres { 
gen `i'_o=1
sum `i' [aw=RMS_avg_spending], d
gen `i'_w=`i'
replace `i'_w=r(p95) if `i'_w>r(p95)
replace `i'_w=r(p5) if `i'_w<r(p5)
replace `i'_o=0 if inrange(`i',r(p5),r(p95))
}

save "$Section4/outcomes_state.dta", replace
